From 210ba1a9d534aefed2db5b97a4c2c6fd59c921e3 Mon Sep 17 00:00:00 2001 From: "akw27@arcadians.cl.cam.ac.uk" Date: Wed, 23 Mar 2005 23:16:17 +0000 Subject: [PATCH] bitkeeper revision 1.1236.41.1 (4241f8c1RPqucowH4YAH-X69s_MQcw) add a metadata cache to the radix io calls. --- tools/blktap/Makefile | 2 +- tools/blktap/radix.c | 620 +++++++++++++++++++++++++++++++++++++++++- tools/blktap/radix.h | 3 + tools/blktap/vdi.c | 4 + 4 files changed, 622 insertions(+), 7 deletions(-) diff --git a/tools/blktap/Makefile b/tools/blktap/Makefile index 09fe2435dc..7f71a219bf 100644 --- a/tools/blktap/Makefile +++ b/tools/blktap/Makefile @@ -174,5 +174,5 @@ bb-tls: $(LIB) blockstore-benchmark.c bb-trans: $(LIB) blockstore-benchmark.c $(CC) $(CFLAGS) -o bb-trans blockstore-benchmark.c blockstore.c -lpthread -radix-test: $(LIB) radix.c blockstore-threaded-trans.c +radix-test: $(LIB) radix.c blockstore.c $(CC) $(CFLAGS) -g3 -D RADIX_STANDALONE -o radix-test radix.c blockstore-threaded-trans.c diff --git a/tools/blktap/radix.c b/tools/blktap/radix.c index c81af26959..44aa0ac482 100644 --- a/tools/blktap/radix.c +++ b/tools/blktap/radix.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "blockstore.h" #include "radix.h" @@ -24,6 +25,10 @@ #define DEBUG */ +/* +#define STAGED +*/ + #define ZERO 0LL #define ONE 1LL #define ONEMASK 0xffffffffffffffeLL @@ -31,6 +36,203 @@ typedef u64 *radix_tree_node; + +/* Experimental radix cache. */ + +static pthread_mutex_t rcache_mutex = PTHREAD_MUTEX_INITIALIZER; +static int rcache_count = 0; +#define RCACHE_MAX 1024 + +typedef struct rcache_st { + radix_tree_node *node; + u64 id; + struct rcache_st *hash_next; + struct rcache_st *cache_next; + struct rcache_st *cache_prev; +} rcache_t; + +static rcache_t *rcache_head = NULL; +static rcache_t *rcache_tail = NULL; + +#define RCHASH_SIZE 512ULL +rcache_t *rcache[RCHASH_SIZE]; +#define RCACHE_HASH(_id) ((_id) & (RCHASH_SIZE - 1)) + +void __rcache_init(void) +{ + int i; +printf("rcache_init!\n"); + + for (i=0; iid == id) + { + memcpy(r->node, node, BLOCK_SIZE); + + /* bring to front. */ + if (r != rcache_head) { + + if (r == rcache_tail) { + if (r->cache_prev != NULL) rcache_tail = r->cache_prev; + rcache_tail->cache_next = NULL; + } + + tmp = r->cache_next; + if (r->cache_next != NULL) r->cache_next->cache_prev + = r->cache_prev; + if (r->cache_prev != NULL) r->cache_prev->cache_next = tmp; + + r->cache_prev = NULL; + r->cache_next = rcache_head; + if (rcache_head != NULL) rcache_head->cache_prev = r; + rcache_head = r; + } + +//printf("Update (%Ld)\n", r->id); + goto done; + } + r = r->hash_next; + } + + if ( rcache_count == RCACHE_MAX ) + { + /* Remove an entry */ + + r = rcache_tail; + if (r->cache_prev != NULL) rcache_tail = r->cache_prev; + rcache_tail->cache_next = NULL; + freeblock(r->node); + + curs = &rcache[RCACHE_HASH(r->id)]; + while ((*curs) != r) + curs = &(*curs)->hash_next; + *curs = r->hash_next; +//printf("Evict (%Ld)\n", r->id); + + } else { + + r = (rcache_t *)malloc(sizeof(rcache_t)); + rcache_count++; + } + + r->node = newblock(); + memcpy(r->node, node, BLOCK_SIZE); + r->id = id; + + r->hash_next = rcache[RCACHE_HASH(id)]; + rcache[RCACHE_HASH(id)] = r; + + r->cache_prev = NULL; + r->cache_next = rcache_head; + if (rcache_head != NULL) rcache_head->cache_prev = r; + rcache_head = r; + if (rcache_tail == NULL) rcache_tail = r; + +//printf("Added (%Ld, %p)\n", id, r->node); +done: + pthread_mutex_unlock(&rcache_mutex); +} + +radix_tree_node *rcache_read(u64 id) +{ + rcache_t *r, *tmp; + radix_tree_node *node = NULL; + + pthread_mutex_lock(&rcache_mutex); + + r = rcache[RCACHE_HASH(id)]; + + for (;;) { + if (r == NULL) { +//printf("Miss (%Ld)\n", id); + goto done; + } + if (r->id == id) break; + r = r->hash_next; + } + + /* bring to front. */ + if (r != rcache_head) + { + if (r == rcache_tail) { + if (r->cache_prev != NULL) rcache_tail = r->cache_prev; + rcache_tail->cache_next = NULL; + } + tmp = r->cache_next; + if (r->cache_next != NULL) r->cache_next->cache_prev = r->cache_prev; + if (r->cache_prev != NULL) r->cache_prev->cache_next = tmp; + + r->cache_prev = NULL; + r->cache_next = rcache_head; + if (rcache_head != NULL) rcache_head->cache_prev = r; + rcache_head = r; + } + + node = newblock(); + memcpy(node, r->node, BLOCK_SIZE); + +//printf("Hit (%Ld, %p)\n", id, r->node); +done: + pthread_mutex_unlock(&rcache_mutex); + + return(node); +} + + +void *rc_readblock(u64 id) +{ + void *ret; + + ret = (void *)rcache_read(id); + + if (ret != NULL) return ret; + + ret = readblock(id); + + if (ret != NULL) + rcache_write(id, ret); + + return(ret); +} + +u64 rc_allocblock(void *block) +{ + u64 ret; + + ret = allocblock(block); + + if (ret != ZERO) + rcache_write(ret, block); + + return(ret); +} + +int rc_writeblock(u64 id, void *block) +{ + int ret; + + ret = writeblock(id, block); + rcache_write(id, block); + + return(ret); +} + + /* * block device interface and other helper functions * with these functions, block id is just a 63-bit number, with @@ -74,6 +276,8 @@ radix_tree_node cloneblock(radix_tree_node block) { * * @return: value on success, zero on error */ +#ifndef STAGED + u64 lookup(int height, u64 root, u64 key) { radix_tree_node node; u64 mask = ONE; @@ -97,7 +301,7 @@ u64 lookup(int height, u64 root, u64 key) { return ZERO; oldroot = root; - node = (radix_tree_node) readblock(getid(root)); + node = (radix_tree_node) rc_readblock(getid(root)); if (node == NULL) return ZERO; @@ -114,6 +318,92 @@ u64 lookup(int height, u64 root, u64 key) { return ZERO; } +#else /* STAGED */ + + +/* non-recursive staged lookup, assume height is 35. */ +u64 lookup(int height, u64 root, u64 key) { + radix_tree_node node; + u64 mask = ONE; + +printf("lookup!\n"); + assert(key >> 35 == 0); + + /* the root block may be smaller to ensure all leaves are full */ + height = 27; + + /* now carve off equal sized chunks at each step */ + + /* ROOT: (LEVEL 0) KEYLEN=35*/ + if (getid(root) == ZERO) + return ZERO; + + node = (radix_tree_node) readblock(getid(root)); + if (node == NULL) + return ZERO; + + root = node[(key >> height) & RADIX_TREE_MAP_MASK]; + mask &= root; + freeblock(node); + + if (height == 0) + return ( root & ONEMASK ) | mask; + + height -= RADIX_TREE_MAP_SHIFT; /* == 18 */ + + /* LEVEL 1: KEYLEN=26*/ + if (getid(root) == ZERO) + return ZERO; + + node = (radix_tree_node) readblock(getid(root)); + if (node == NULL) + return ZERO; + + root = node[(key >> height) & RADIX_TREE_MAP_MASK]; + mask &= root; + freeblock(node); + + if (height == 0) + return ( root & ONEMASK ) | mask; + + height -= RADIX_TREE_MAP_SHIFT; /* == 9 */ + + /* LEVEL 2: KEYLEN=17*/ + if (getid(root) == ZERO) + return ZERO; + + node = (radix_tree_node) readblock(getid(root)); + if (node == NULL) + return ZERO; + + root = node[(key >> height) & RADIX_TREE_MAP_MASK]; + mask &= root; + freeblock(node); + + if (height == 0) + return ( root & ONEMASK ) | mask; + + height -= RADIX_TREE_MAP_SHIFT; /* == 0 */ + + /* LEVEL 3: KEYLEN=8*/ + if (getid(root) == ZERO) + return ZERO; + + node = (radix_tree_node) readblock(getid(root)); + if (node == NULL) + return ZERO; + + root = node[(key >> height) & RADIX_TREE_MAP_MASK]; + mask &= root; + freeblock(node); + + // if (height == 0) + return ( root & ONEMASK ) | mask; + +} + +#endif + /* * update: set a radix tree entry, doing copy-on-write as necessary * @height: height in bits of the radix tree @@ -123,6 +413,10 @@ u64 lookup(int height, u64 root, u64 key) { * * @returns: (possibly new) root id on success (with LSB=1), 0 on failure */ + +#ifndef STAGED + + u64 update(int height, u64 root, u64 key, u64 val) { int offset; u64 child; @@ -145,7 +439,7 @@ u64 update(int height, u64 root, u64 key, u64 val) { if (root == ZERO) { node = (radix_tree_node) newblock(); } else { - node = (radix_tree_node) readblock(getid(root)); + node = (radix_tree_node) rc_readblock(getid(root)); if (!iswritable(root)) { /* need to clone this node */ @@ -181,10 +475,10 @@ u64 update(int height, u64 root, u64 key, u64 val) { /* new/cloned blocks need to be saved */ if (root == ZERO) { /* mark this as an owned block */ - root = allocblock(node); + root = rc_allocblock(node); if (root) root = writable(root); - } else if (writeblock(getid(root), node) < 0) { + } else if (rc_writeblock(getid(root), node) < 0) { freeblock(node); return ZERO; } @@ -193,6 +487,320 @@ u64 update(int height, u64 root, u64 key, u64 val) { return root; } + +#else /* STAGED */ + +/* When update is called, state->next points to the thing to call after + * update is finished. */ + +struct cb_state_st; + +typedef struct { + /* public stuff */ + u64 val; + u64 key; + u64 result; + + /* internal state */ + u64 root[4]; + radix_tree_node node[4]; + void (*next)(struct cb_state_st *); + int err; +} radix_update_t; + +typedef struct cb_state_st{ + void (*next)(struct cb_state_st *); /* Next continuation. */ + union { + radix_update_t update; + } radix; +} cb_state_t; + +void s_readblock(cb_state_t *state, u64 id, void **ret) +{ + *ret = readblock(id); + state->next(state); +} + +void s_allocblock(cb_state_t *state, void *block, u64 *ret) +{ + *ret = allocblock(block); + state->next(state); +} + +void s_writeblock(cb_state_t *state, u64 id, void *block, int *ret) +{ + *ret = writeblock(id, block); + state->next(state); +} + +void cb_done(cb_state_t *state) +{ + printf("*** done ***\n"); +} + +/* forward prototypes. */ +void up0(cb_state_t *state); +void up1(cb_state_t *state); +void up2(cb_state_t *state); +void up3(cb_state_t *state); +void up4(cb_state_t *state); +void up5(cb_state_t *state); +void up6(cb_state_t *state); +void up7(cb_state_t *state); +void up8(cb_state_t *state); +void up9(cb_state_t *state); +void up10(cb_state_t *state); +void up11(cb_state_t *state); +void up12(cb_state_t *state); + +u64 update(int height, u64 root, u64 key, u64 val) +{ + cb_state_t state; + radix_update_t *u = &state.radix.update; + + u->val = val; + u->key = key; + u->root[0] = root; + u->root[1] = u->root[2] = u->root[3] = ZERO; + u->node[0] = u->node[1] = u->node[2] = u->node[3] = NULL; + + /* take a copy of the higher-scoped next continuation. */ + u->next = state->next; + + /* update start state */ + state->next = up0; + + for (;;) + { + state->next(state); + if (state->next == NULL) + break; + } + + return u->result; +} + +/* c0:*/ +void up0(cb_state_t *state) { + radix_update_t *u = &state->radix.update; + + state->next = up1; + s_readblock(state, getid(u->root[0]), (void **)&(u->node[0])); +} + +/* c1: */ +void up1(cb_state_t *state) { + radix_update_t *u = &state->radix.update; + + u->root[1] = u->node[0][u->key >> 27 & RADIX_TREE_MAP_MASK]; + if (u->root[1] == ZERO) { + u->node[1] = (radix_tree_node) newblock(); + /* goto next continuation (c2)*/ up2(state);return; + } else { + state->next = up2; + s_readblock(state, getid(u->root[1]), (void **)&(u->node[1])); + } +} + +/* c2: */ +void up2(cb_state_t *state) { + radix_update_t *u = &state->radix.update; + + if ((u->root[1] != ZERO) && (!iswritable(u->root[1]))) { + /* need to clone this node */ + radix_tree_node oldnode = u->node[1]; + u->node[1] = cloneblock(u->node[1]); + freeblock(oldnode); + u->root[1] = ZERO; + } + u->root[2] = u->node[1][u->key >> 18 & RADIX_TREE_MAP_MASK]; + if (u->root[2] == ZERO) { + u->node[2] = (radix_tree_node) newblock(); + /* goto next continuation (c3)*/ up3(state);return; + } else { + state->next = up3; + s_readblock(state, getid(u->root[2]), (void **)&(u->node[2])); + } +} + +/* c3: */ +void up3(cb_state_t *state) { + radix_update_t *u = &state->radix.update; + + if ((u->root[2] != ZERO) && (!iswritable(u->root[2]))) { + /* need to clone this node */ + radix_tree_node oldnode = u->node[2]; + u->node[2] = cloneblock(u->node[2]); + freeblock(oldnode); + u->root[2] = ZERO; + } + u->root[3] = u->node[2][u->key >> 9 & RADIX_TREE_MAP_MASK]; + if (u->root[3] == ZERO) { + u->node[3] = (radix_tree_node) newblock(); + /* goto next continuation (c4)*/ up4(state);return; + } else { + state->next = up4; + s_readblock(state, getid(u->root[3]), (void **)&(u->node[3])); + } +} + +/* c4: */ +void up4(cb_state_t *state) { + radix_update_t *u = &state->radix.update; + + if ((u->root[3] != ZERO) && (!iswritable(u->root[3]))) { + /* need to clone this node */ + radix_tree_node oldnode = u->node[3]; + u->node[3] = cloneblock(u->node[3]); + freeblock(oldnode); + u->root[3] = ZERO; + } + + if (u->node[3][u->key & RADIX_TREE_MAP_MASK] == u->val){ + /* no change, so we already owned the child */ + /* goto last continuation (c12) */ up12(state);return; + } + + u->node[3][u->key & RADIX_TREE_MAP_MASK] = u->val; + + /* new/cloned blocks need to be saved */ + if (u->root[3] == ZERO) { + /* mark this as an owned block */ + state->next = up5; + s_allocblock(state, u->node[3], &u->root[3]); + /* goto continuation (c5) */ return; + } else { + state->next = up6; + s_writeblock(state, getid(u->root[3]), u->node[3], &u->err); + /* goto continuation (c6) */ return; + } +} + +/* c5: */ +void up5(cb_state_t *state) { + radix_update_t *u = &state->radix.update; + + if (u->root[3]) + u->root[3] = writable(u->root[3]); + /* goto continuation (c6) */ up6(state);return; +} + +/* c6: */ +void up6(cb_state_t *state) { + radix_update_t *u = &state->radix.update; + + if (u->node[2][u->key >> 9 & RADIX_TREE_MAP_MASK] == u->root[3]){ + /* no change, so we already owned the child */ + /* goto last continuation (c12) */ up12(state);return; + } + + u->node[2][u->key >> 9 & RADIX_TREE_MAP_MASK] = u->root[3]; + + /* new/cloned blocks need to be saved */ + if (u->root[2] == ZERO) { + /* mark this as an owned block */ + state->next = up7; + s_allocblock(state, u->node[2], &u->root[2]); + /* goto continuation (c7) */return; + } else { + state->next = up8; + s_writeblock(state, getid(u->root[2]), u->node[2], &u->err); + /* goto continuation (c8) */return; + } +} + +/* c7: */ +void up7(cb_state_t *state) { + radix_update_t *u = &state->radix.update; + + if (u->root[2]) + u->root[2] = writable(u->root[2]); + /* goto continuation (c8) */ up8(state);return; +} + +/* c8: */ +void up8(cb_state_t *state) { + radix_update_t *u = &state->radix.update; + + if (u->node[1][u->key >> 18 & RADIX_TREE_MAP_MASK] == u->root[2]){ + /* no change, so we already owned the child */ + /* goto last continuation (c12) */ up12(state);return; + } + + u->node[1][u->key >> 18 & RADIX_TREE_MAP_MASK] = u->root[2]; + + /* new/cloned blocks need to be saved */ + if (u->root[1] == ZERO) { + /* mark this as an owned block */ + state->next = up9; + s_allocblock(state, u->node[1], &u->root[1]); + /* goto continuation (c9) */return; + } else { + state->next = up10; + s_writeblock(state, getid(u->root[1]), u->node[1], &u->err); + /* goto continuation (c10) */return; + } +} + +/* c9: */ +void up9(cb_state_t *state) { + radix_update_t *u = &state->radix.update; + + if (u->root[1]) + u->root[1] = writable(u->root[1]); + /* goto continuation (c10) */ up10(state);return; +} + +/* c10: */ +void up10(cb_state_t *state) { + radix_update_t *u = &state->radix.update; + + if (u->node[0][u->key >> 27 & RADIX_TREE_MAP_MASK] == u->root[1]){ + /* no change, so we already owned the child */ + /* goto last continuation (c12) */ up12(state);return; + } + + u->node[0][u->key >> 27 & RADIX_TREE_MAP_MASK] = u->root[1]; + + /* new/cloned blocks need to be saved */ + if (u->root[0] == ZERO) { + /* mark this as an owned block */ + state->next = up11; + s_allocblock(state, u->node[0], &u->root[0]); + /* goto continuation (c11) */ return; + } else { + state->next = up10; + s_writeblock(state, getid(u->root[0]), u->node[0], &u->err); + /* goto continuation (c12) */ return; + } +} + +/* c11: */ +void up11(cb_state_t *state) { + radix_update_t *u = &state->radix.update; + + if (u->root[0]) + u->root[0] = writable(u->root[0]); + /* goto continuation (c12) */ up12(state);return; +} + +/* c12: */ +void up12(cb_state_t *state) { + radix_update_t *u = &state->radix.update; + + int i; + for (i=0;i<4;i++) + if(u->node[i] != NULL) freeblock(u->node[i]); + + u->result = u->root[0]; + state->next = u->next; + + state->next(state);return; +} + +#endif + + /** * snapshot: create a snapshot * @root: old root node @@ -202,7 +810,7 @@ u64 update(int height, u64 root, u64 key, u64 val) { u64 snapshot(u64 root) { radix_tree_node node, newnode; - if ((node = readblock(getid(root))) == NULL) + if ((node = rc_readblock(getid(root))) == NULL) return ZERO; newnode = cloneblock(node); @@ -210,7 +818,7 @@ u64 snapshot(u64 root) { if (newnode == NULL) return ZERO; - root = allocblock(newnode); + root = rc_allocblock(newnode); freeblock(newnode); if (root == ZERO) diff --git a/tools/blktap/radix.h b/tools/blktap/radix.h index 7feaf0c316..e7de3f453e 100644 --- a/tools/blktap/radix.h +++ b/tools/blktap/radix.h @@ -29,4 +29,7 @@ u64 snapshot(u64 root); int collapse(int height, u64 proot, u64 croot); int isprivate(int height, u64 root, u64 key); + +void __rcache_init(void); + #endif /* __RADIX_H__ */ diff --git a/tools/blktap/vdi.c b/tools/blktap/vdi.c index 7c6c63094c..b3a84f0244 100644 --- a/tools/blktap/vdi.c +++ b/tools/blktap/vdi.c @@ -171,6 +171,9 @@ void vdi_snapshot(vdi_t *vdi) int __init_vdi() { + /* sneak this in here for the moment. */ + __rcache_init(); + /* force the registry to be created if it doesn't exist. */ vdi_registry_t *vdi_reg = get_vdi_registry(); if (vdi_reg == NULL) { @@ -179,6 +182,7 @@ int __init_vdi() } freeblock(vdi_reg); + return 0; } -- 2.30.2